########################################################
## PROGRAM NAME: 101_spatial_stats.R                  ##
## AUTHOR: MATT MLECZKO                               ##
## INPUTS:                                            ##
##          100_zri_t2.csv                            ##
##          100_zri_t1.csv                            ##
##          005_muni_msa_2022.Rda                     ##
##                                                    ##
## OUTPUTS:                                           ##
##          101_nzlu_t2_sp_res.Rda                    ##
##          101_nzlu_t1_sp_res.Rda                    ##
##                                                    ## 
## PURPOSE: Create maps, spatial measures             ##
##          (create Figures 1,2)                      ##
##                                                    ##
## LIST OF UPDATES:                                   ##
########################################################

#log <- file(# USER DEFINED PATH AND FILE NAME #)
#sink(log, append=TRUE)
#sink(log, append=TRUE, type="message")

##
## some of this code was developed with help from Microsoft copilot
##

# Load necessary libraries
library(sf)
library(dplyr)
library(sp)
library(ggplot2)
library(tigris)
library(stringr)
library(cowplot)

## create a merge function that creates merge frequency as in Stata ##
## from user rwbuie at stackoverflow: https://stackoverflow.com/questions/30358401/is-there-a-way-to-create-statas-merge-indicator-variable-with-rs-merge ##
stata.merge <- function(x,y, name){
  x$df1 <- 1
  y$df2 <- 2
  df <- merge(x,y, by = name, all = TRUE)
  df$merge.variable <- rowSums(df[,c("df1", "df2")], na.rm=TRUE)
  df$df1 <- NULL
  df$df2<- NULL
  df
  #print(table(df$merge.variable))
  
  ## return the merged dataframe
  return(df)
}

## set directory ##

data_path <- # USER DEFINED PATH HERE #
setwd(data_path)

nzlu.munis.t2.in <- read.csv("100_zri_t2.csv")
nzlu.munis.t2.in$GEOID_full <- as.character(nzlu.munis.t2.in$GEOID_full)

range(nchar(nzlu.munis.t2.in$GEOID_full))

nzlu.munis.t2 <- nzlu.munis.t2.in %>%
  mutate(GEOID_full_rf = case_when(nchar(GEOID_full) == 6 ~ str_pad(GEOID_full,7,"left","0"),
                                   nchar(GEOID_full) == 9 ~ str_pad(GEOID_full,10,"left","0"),
                                   TRUE ~ GEOID_full)) %>%
  select(-GEOID_full) %>%
  rename(GEOID_full = GEOID_full_rf)

range(nchar(nzlu.munis.t2$GEOID_full))


nzlu.munis.t1.in <- read.csv("100_zri_t1.csv")
nzlu.munis.t1.in$GEOID_full <- as.character(nzlu.munis.t1.in$GEOID_full)

range(nchar(nzlu.munis.t1.in$GEOID_full))

nzlu.munis.t1 <- nzlu.munis.t1.in %>%
  mutate(GEOID_full_rf = case_when(nchar(GEOID_full) == 6 ~ str_pad(GEOID_full,7,"left","0"),
                                   nchar(GEOID_full) == 9 ~ str_pad(GEOID_full,10,"left","0"),
                                   TRUE ~ GEOID_full)) %>%
  select(-GEOID_full) %>%
  rename(GEOID_full = GEOID_full_rf)

range(nchar(nzlu.munis.t1$GEOID_full))


## get shapefiles ## 

## states to loop through ##
states <- c("AL","AK","AZ","AR","CA","CO","CT","DE",
            "DC","FL","GA","HI","ID","IL","IN","IA","KS",
            "KY","LA","ME","MD","MA","MI","MN","MS","MO",
            "MT","NE","NV","NH","NJ","NM","NY","NC","ND",
            "OH","OK","OR","PA","RI","SC","SD","TN","TX",
            "UT","VT","VA","WA","WV","WI","WY")

## initialize lists to store data frames ##
st.cs <- list()
st.pl <- list() 

## initialize counter ## 
state.counter <- 1

## start the loop ##
for (st in states){
  
  pl <- places(year = 2011,
               state = st)
  
  cs <- county_subdivisions(year = 2011,
                            state = st)
  
  ## store the data frame in the list ## 
  st.pl[[state.counter]] <- pl
  st.cs[[state.counter]] <- cs
  
  ## increase interval by 1 ## 
  state.counter <- state.counter + 1
  
}

##
## 2019-2022 (time period 2) 
##

## combine shapefiles and muni files

st.pl.df <- bind_rows(st.pl) %>%
  mutate(GEOID_full = paste0(STATEFP,
                             PLACEFP))

st.pl.df$GEOID_full[st.pl.df$GEOID_full == "1571550"] <- "1517000"

range(nchar(st.pl.df$GEOID_full))

pl.geoids <- st.pl.df %>%
  select(GEOID_full)

st.cs.df <- bind_rows(st.cs) %>%
  mutate(GEOID = paste0(STATEFP,
                        COUSUBFP),
         GEOID_full = paste0(STATEFP,
                             COUNTYFP,
                             COUSUBFP))

st.cs.df$GEOID_full[st.cs.df$GEOID_full == "1711930107"] <- "1711930094"
st.cs.df$GEOID_full[st.cs.df$GEOID_full == "2502178972"] <- "2502178865"
st.cs.df$GEOID_full[st.cs.df$GEOID_full == "3911383090"] <- "3911383111"
st.cs.df$GEOID_full[st.cs.df$GEOID_full == "3913947152"] <- "3913947138"


range(nchar(st.cs.df$GEOID_full))

cs.geoids <- as.data.frame(st.cs.df) %>%
  select(GEOID_full)

nzlu.munis.t2.wsf1 <- stata.merge(nzlu.munis.t2,
                                  pl.geoids,
                                  "GEOID_full")

## check merge ##
table(nzlu.munis.t2.wsf1$merge.variable, useNA = "ifany")

nzlu.munis.t2.ksf1 <- nzlu.munis.t2.wsf1 %>%
  filter(merge.variable == 3)

nzlu.munis.t2.fm2 <- nzlu.munis.t2.wsf1 %>%
  filter(merge.variable == 1) %>%
  select(GEOID_full) 

range(nchar(nzlu.munis.t2.fm2$GEOID_full))

nzlu.munis.t2.wsf2 <- stata.merge(nzlu.munis.t2.fm2,
                                  cs.geoids,
                                  "GEOID_full")

table(nzlu.munis.t2.wsf2$merge.variable, useNA = "ifany")

nzlu.munis.t2.ksf2 <- nzlu.munis.t2.wsf2 %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

ncol(nzlu.munis.t2.ksf1)
ncol(nzlu.munis.t2.ksf2)

pl.df.fin <- st.pl.df %>%
  filter(GEOID_full %in% nzlu.munis.t2.ksf1$GEOID_full)

nrow(pl.df.fin) == nrow(nzlu.munis.t2.ksf1)

cs.df.fin <- st.cs.df %>%
  filter(GEOID_full %in% nzlu.munis.t2.ksf2$GEOID_full)

nrow(cs.df.fin) == nrow(nzlu.munis.t2.ksf2)

nrow(pl.df.fin) + nrow(cs.df.fin) == nrow(nzlu.munis.t2)

## convert to sf objects

pl.sf <- st_as_sf(pl.df.fin) %>%
  select(GEOID_full,
         NAME,
         NAMELSAD,
         LSAD,
         FUNCSTAT,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry)
         
cs.sf <- st_as_sf(cs.df.fin) %>%
  select(GEOID_full,
         NAME,
         NAMELSAD,
         LSAD,
         FUNCSTAT,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry)

ncol(pl.sf)
ncol(cs.sf)

all.sf <- rbind(pl.sf,
                cs.sf)

nzlu.munis.t2.wsf.m <- stata.merge(nzlu.munis.t2,
                                   all.sf,
                                   "GEOID_full")

## check merge ##
table(nzlu.munis.t2.wsf.m$merge.variable, useNA="ifany")

nzlu.munis.t2.wsf <- nzlu.munis.t2.wsf.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

names(nzlu.munis.t2.wsf)

nzlu.munis.t2.sf <- st_as_sf(nzlu.munis.t2.wsf)

# Calculate the median of the index variable within a 10-mile radius (expressed in meters)
radius <- 10 * 1609.34

# Create a buffer around each point
nzlu.munis.t2.sf.buff <- st_buffer(nzlu.munis.t2.sf, dist = radius)

# Perform the spatial join to calculate statistics
nzlu.munis.t2.sf.fin <- st_join(nzlu.munis.t2.sf.buff, nzlu.munis.t2.sf) %>%
  group_by(GEOID_full.x) %>%
  summarize(zri_st_2022_median = median(zri_st_2022.y, na.rm = TRUE), .groups = "drop") %>%
  rename(GEOID_full = GEOID_full.x)

## export non-shapefile version ##
nzlu.munis.t2.spst <- as.data.frame(nzlu.munis.t2.sf.fin)
class(nzlu.munis.t2.spst)

## export file ##

save(nzlu.munis.t2.spst, 
     file = "101_nzlu_t2_sp_res.Rda")


##
## 2004-2006 (time period 1) 
##


## combine shapefiles and muni files

nzlu.munis.t1.wsf1 <- stata.merge(nzlu.munis.t1,
                                  pl.geoids,
                                  "GEOID_full")

## check merge ##
table(nzlu.munis.t1.wsf1$merge.variable, useNA = "ifany")

nzlu.munis.t1.ksf1 <- nzlu.munis.t1.wsf1 %>%
  filter(merge.variable == 3)

nzlu.munis.t1.fm2 <- nzlu.munis.t1.wsf1 %>%
  filter(merge.variable == 1) %>%
  select(GEOID_full) 

range(nchar(nzlu.munis.t1.fm2$GEOID_full))

nzlu.munis.t1.wsf2 <- stata.merge(nzlu.munis.t1.fm2,
                                  cs.geoids,
                                  "GEOID_full")

table(nzlu.munis.t1.wsf2$merge.variable, useNA = "ifany")

nzlu.munis.t1.ksf2 <- nzlu.munis.t1.wsf2 %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

ncol(nzlu.munis.t1.ksf1)
ncol(nzlu.munis.t1.ksf2)

pl.df.fin <- st.pl.df %>%
  filter(GEOID_full %in% nzlu.munis.t1.ksf1$GEOID_full)

nrow(pl.df.fin) == nrow(nzlu.munis.t1.ksf1)

cs.df.fin <- st.cs.df %>%
  filter(GEOID_full %in% nzlu.munis.t1.ksf2$GEOID_full)

nrow(cs.df.fin) == nrow(nzlu.munis.t1.ksf2)

nrow(pl.df.fin) + nrow(cs.df.fin) == nrow(nzlu.munis.t1)

## convert to sf objects

nzlu.munis.t1.wsf.m <- stata.merge(nzlu.munis.t1,
                                   all.sf,
                                   "GEOID_full")

## check merge ##
table(nzlu.munis.t1.wsf.m$merge.variable, useNA="ifany")

nzlu.munis.t1.wsf <- nzlu.munis.t1.wsf.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

names(nzlu.munis.t1.wsf)

nzlu.munis.t1.sf <- st_as_sf(nzlu.munis.t1.wsf)

# Calculate the median of the index variable within a 10-mile radius (expressed in meters)
radius <- 10 * 1609.34

# Create a buffer around each point
nzlu.munis.t1.sf.buff <- st_buffer(nzlu.munis.t1.sf, dist = radius)

# Perform the spatial join to calculate statistics
nzlu.munis.t1.sf.fin <- st_join(nzlu.munis.t1.sf.buff, nzlu.munis.t1.sf) %>%
  group_by(GEOID_full.x) %>%
  summarize(zri_st_2006_median = median(zri_st_2006.y, na.rm = TRUE), .groups = "drop") %>%
  rename(GEOID_full = GEOID_full.x)

## export non-shapefile version ##
nzlu.munis.t1.spst <- as.data.frame(nzlu.munis.t1.sf.fin)
class(nzlu.munis.t1.spst)

## export file ##

save(nzlu.munis.t1.spst, 
     file = "101_nzlu_t1_sp_res.Rda")


#################
## create maps ##
#################

## need to get overall legend as separate step ##

usa <- ggplot() +
  geom_sf(data = nzlu.munis.t2.sf, aes(fill = zri_st_2022), color = "black") +
  scale_fill_gradient2(low = "darkgreen",mid = "gold", high = "red") + 
  theme_void() + 
  theme(legend.position.inside = c(0.7, 0.7)) + 
  labs(fill = "ZRI 2019 \u2013 2022")

## thanks to Copilot for help here 
## extract legend
legend <- get_legend(usa)

## display legend as a plot
ggdraw(legend)

###############################
## now do specific msa plots ##
###############################

## set same color gradient for the entire sample ##

color_gradient <- colorRampPalette(c("darkgreen", "gold", "red"))(nrow(nzlu.munis.t2))
nzlu.munis.t2 <- nzlu.munis.t2 %>%
  mutate(fill_color = color_gradient[rank(nzlu.munis.t2$zri_st_2022, ties.method = "first")])

## now, bring on the rest of the munis and their shapefiles ##

load("005_muni_msa_2022.Rda")

## account for princeton, nj merger ##
st.cs.df$GEOID_full[st.cs.df$GEOID_full == "3402160915"] <- "3402160900"


## trenton msa ##

tp.munis <- muni.msa.2022 %>%
  filter(cbsa10 == "45940")

tp.msa <- nzlu.munis.t2 %>%
  select(GEOID_full,
         zri_st_2022,
         fill_color) %>%
  right_join(tp.munis, "GEOID_full") %>%
  left_join(st.cs.df, "GEOID_full")

tp.msa.sf <- st_as_sf(tp.msa)

ggplot() +
  geom_sf(data = tp.msa.sf, aes(fill = factor(fill_color)), color = "black") +
  scale_fill_identity() +
  ggtitle("Trenton-Princeton MSA")+
  theme_void()

## milwaukee msa ##

mke.munis <- muni.msa.2022 %>%
  filter(cbsa10 == "33340")

mke.msa1 <- nzlu.munis.t2 %>%
  select(GEOID_full,
         zri_st_2022,
         fill_color) %>%
  right_join(mke.munis, "GEOID_full") %>%
  left_join(st.cs.df, "GEOID_full") %>%
  select(GEOID_full,
         NAME.x,
         zri_st_2022,
         fill_color,
         cbsa10,
         cbsaname10,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry) %>%
  filter(!is.na(ALAND))


mke.msa2 <- nzlu.munis.t2 %>%
  select(GEOID_full,
         zri_st_2022,
         fill_color) %>%
  right_join(mke.munis, "GEOID_full") %>%
  left_join(st.pl.df, "GEOID_full") %>%
  select(GEOID_full,
         NAME.x,
         zri_st_2022,
         fill_color,
         cbsa10,
         cbsaname10,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry) %>%
  filter(!is.na(ALAND))

mke.msa <- rbind(mke.msa1,
                 mke.msa2)


mke.msa.sf <- st_as_sf(mke.msa)

ggplot() +
  geom_sf(data = mke.msa.sf, aes(fill = factor(fill_color)), color = "black") +
  scale_fill_identity() +
  theme_void() + 
  ggtitle("Milwaukee-Waukesha MSA") +
  guides(fill="none")



## san antonio ##

sa.munis <- muni.msa.2022 %>%
  filter(cbsa10 == "41700")

sa.msa1 <- nzlu.munis.t2 %>%
  select(GEOID_full,
         zri_st_2022,
         fill_color) %>%
  right_join(sa.munis, "GEOID_full") %>%
  left_join(st.cs.df, "GEOID_full") %>%
  select(GEOID_full,
         NAME.x,
         zri_st_2022,
         fill_color,
         cbsa10,
         cbsaname10,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry) %>%
  filter(!is.na(ALAND))


sa.msa2 <- nzlu.munis.t2 %>%
  select(GEOID_full,
         zri_st_2022,
         fill_color) %>%
  right_join(sa.munis, "GEOID_full") %>%
  left_join(st.pl.df, "GEOID_full") %>%
  select(GEOID_full,
         NAME.x,
         zri_st_2022,
         fill_color,
         cbsa10,
         cbsaname10,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry) %>%
  filter(!is.na(ALAND))

sa.msa <- rbind(sa.msa1,
                sa.msa2)


sa.msa.sf <- st_as_sf(sa.msa)

ggplot() +
  geom_sf(data = sa.msa.sf, aes(fill = factor(fill_color)), color = "black") +
  scale_fill_identity() +
  theme_void() + 
  ggtitle("San Antonio-New Braunfels MSA") +
  guides(fill="none")



## pittsburgh ##

pitt.munis <- muni.msa.2022 %>%
  filter(cbsa10 == "38300")

pitt.msa1 <- nzlu.munis.t2 %>%
  select(GEOID_full,
         zri_st_2022,
         fill_color) %>%
  right_join(pitt.munis, "GEOID_full") %>%
  left_join(st.cs.df, "GEOID_full") %>%
  select(GEOID_full,
         NAME.x,
         zri_st_2022,
         fill_color,
         cbsa10,
         cbsaname10,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry) %>%
  filter(!is.na(ALAND))


pitt.msa2 <- nzlu.munis.t2 %>%
  select(GEOID_full,
         zri_st_2022,
         fill_color) %>%
  right_join(pitt.munis, "GEOID_full") %>%
  left_join(st.pl.df, "GEOID_full") %>%
  select(GEOID_full,
         NAME.x,
         zri_st_2022,
         fill_color,
         cbsa10,
         cbsaname10,
         ALAND,
         AWATER,
         INTPTLAT,
         INTPTLON,
         geometry) %>%
  filter(!is.na(ALAND))

pitt.msa <- rbind(pitt.msa1,
                  pitt.msa2)


pitt.msa.sf <- st_as_sf(pitt.msa)

ggplot() +
  geom_sf(data = pitt.msa.sf, aes(fill = factor(fill_color)), color = "black") +
  scale_fill_identity() +
  theme_void() + 
  ggtitle("Pittsburgh MSA") +
  guides(fill="none")



## END OF PROGRAM ##

#sink()



